import numpy as np
import pandas as pd
import datetime
import math
from pandas import read_excel
from scipy.optimize import minimize
#Import matplotlib
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
#plt.style.use('fivethirtyeight')
%matplotlib inline
# Import Prophet
import plotly.offline as py
#import plotly.express as px
from fbprophet import Prophet
from fbprophet.plot import plot_plotly, add_changepoints_to_plot
from datetime import datetime, timedelta
import logging
logging.getLogger('fbprophet').setLevel(logging.WARNING)
# hide warnings
import warnings
warnings.filterwarnings('ignore')
import requests
import io
# Username of GitHub account
username = 'abdulbarimalik'
# Personal Access Token (PAO) from GitHub account
token = 'a5389dd2ed961708e248000815bfb4a638f2b6e8'
# Creates a re-usable session object with creds in-built
github_session = requests.Session()
github_session.auth = (username, token)
# Downloading the csv file from GitHub
url = "https://raw.githubusercontent.com/abdulbarimalik/COVID19/master/Covid-19-PAK.csv?token=AB6MNPZN2XR4SASSW4JSSO26YS46Q" # Make sure the url is the raw version of the file on GitHub
download = github_session.get(url).content
# Reading the downloaded content and making it a pandas dataframe
full_table = pd.read_csv(io.StringIO(download.decode('utf-8')))
# Read data file
#full_table = pd.read_csv("https://raw.githubusercontent.com/abdulbarimalik/COVID19PAK/master/Covid-19-PAK.csv")
#full_table = pd.read_excel('F:/Data Sets/COVID-19/Pakistan/time series/COVID-19_DATA.xlsx',sheet_name='TimeSeries_KeyIndicators')
# Add Active Cases count
full_table['Active'] = full_table['Cumulative Test positive'] - full_table['Expired'] - full_table['Discharged']
# Set Date into Pandas date format
full_table['Date'] = pd.to_datetime(full_table['Date'])
# Rename data frame columns
fulltable = full_table.rename(columns={"Cumulative Test positive": "Confirmed",
"Expired": "Deaths","Discharged": "Recovered"})
fulltable['Date'] = pd.to_datetime(fulltable['Date'])
# latest condensed
full_latest = fulltable[fulltable['Date'] == max(fulltable['Date'])].reset_index()
full_table_grouped = full_latest.groupby('Region')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
temp_f = full_table_grouped.sort_values(by='Confirmed', ascending=False)
temp_f = temp_f[['Region', 'Confirmed', 'Active', 'Deaths', 'Recovered']]
temp_f = temp_f.reset_index(drop=True)
temp_f["Mortality Rate (per 100)"] = np.round(100*temp_f["Deaths"]/temp_f["Confirmed"],2)
temp_f.style.background_gradient(cmap="Reds", subset=['Confirmed'])\
.background_gradient(cmap="Greens", subset=['Recovered'])\
.background_gradient(cmap="Greys", subset=['Deaths'])\
.background_gradient(cmap="Blues", subset=['Active'])\
.background_gradient(cmap='YlOrBr',subset=["Mortality Rate (per 100)"])
def daily_increase(data):
d = []
for i in range(len(data)):
if i == 0:
d.append(data[0])
else:
d.append(data[i]-data[i-1])
return d
def autolabel(rects):
"""Attach a text label above each bar , displaying its height."""
for rect in rects:
height = rect.get_height()
#plt.text(rect.get_x() + rect.get_width()/2.0, height, '%d' % int(height), ha='center', va='bottom')
plt.annotate('{}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, 7), # 4 points vertical offset
textcoords="offset points",
ha='center', va='bottom', size = 12)
plt.figure(figsize=(12, 8))
pr_plot = full_table_grouped.plot('Region',['Confirmed','Deaths','Recovered'],kind = 'bar', color = ['#FF3633','#000000','#8dc354'], figsize=(13,8),title='COVID-19 Cases in Pakistan')
for p in pr_plot.patches:
pr_plot.annotate(format(p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
plt.ylabel('COVID-19 Count', size=12)
plt.xlabel('Region', size=12)
pr_plot.set_xticklabels(full_table_grouped['Region'],rotation=360)
plt.grid()
plt.show()
COVID-19 Count in Pakistan
global full_table_grouped
# Setting the positions and width for the bars
pos = list(range(len(full_table_grouped['Confirmed'])))
width = 0.25
# Plotting the bars
fig, ax = plt.subplots(figsize=(12,8))
# Create a bar with pre_score data,
# in position pos,
barplot1 = plt.bar(pos,
#using df['pre_score'] data,
full_table_grouped['Confirmed'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#FF3633', #'#EE3224'
# with label the first value in first_name
label= full_table_grouped['Region'][0])
autolabel(barplot1)
# Create a bar with mid_score data,
# in position pos + some width buffer,
barplot2=plt.bar([p + width for p in pos],
#using df['mid_score'] data,
full_table_grouped['Deaths'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#000000' , #'#F78F1E'
# with label the second value in first_name
label=full_table_grouped['Region'][1])
autolabel(barplot2)
# Create a bar with post_score data,
# in position pos + some width buffer,
barplot3=plt.bar([p + width*2 for p in pos],
#using df['post_score'] data,
full_table_grouped['Recovered'],
# of width
width,
# with alpha 0.5
alpha=0.5,
# with color
color='#8dc354', #'#FFC222'
# with label the third value in first_name
label=full_table_grouped['Region'][2])
autolabel(barplot3)
# Set the y axis label
ax.set_ylabel('Total Cases')
ax.set_xlabel('Region')
# Set the chart's title
ax.set_title('COVID-19 Cases in Pakistan')
# Set the position of the x ticks
ax.set_xticks([p + 1.5 * width for p in pos])
# Set the labels for the x ticks
ax.set_xticklabels(full_table_grouped['Region'])
# # Setting the x-axis and y-axis limits
plt.xlim(min(pos)-width, max(pos)+width*4)
plt.ylim([0, max(full_table_grouped['Confirmed'] + 5000)] )
# Adding the legend and showing the plot
plt.legend(['Confirmed', 'Deaths', 'Recovered'], loc='upper left')
plt.grid()
plt.show()
COVID-19 Daily Cases in Pakistan
import matplotlib.dates as mdates
predgrp = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
df_t = predgrp.tail(34)
df_cnfrm = df_t.reset_index().drop('index',axis=1)
df_cnfrm
pk_cnfrm_increase = daily_increase(df_cnfrm['Confirmed'])
pk_cnfrm_increase = pk_cnfrm_increase[1:]
pk_cnfrm_increase
plt.figure(figsize=(20, 10))
bar_plot = plt.bar(predgrp['Date'].tail(33), pk_cnfrm_increase, width = 0.5, align = 'center', color = '#FF3633')
autolabel(bar_plot)
plt.title('Daily New COVID-19 Cases in {}'.format('Pakistan'), size=20)
plt.xlabel('Dates', size=20)
plt.ylabel('Number of Cases', size=20)
plt.xticks(size=13)
plt.yticks(size=15)
plt.show()
import matplotlib.dates as mdates
predgrp = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
df_t = predgrp.tail(34)
df_fat = df_t.reset_index().drop('index',axis=1)
pk_fat_increase = daily_increase(df_fat['Deaths'])
pk_fat_increase = pk_fat_increase[1:]
pk_fat_increase
plt.figure(figsize=(15, 10))
bar_plot = plt.bar(predgrp['Date'].tail(33), pk_fat_increase, color='#000000', width = 0.5, align = 'center')
autolabel(bar_plot)
plt.title('Daily COVID-19 Fatalities in {}'.format('Pakistan'), size=20)
plt.xlabel('Dates', size=20)
plt.ylabel('Number of Cases', size=20)
plt.xticks(size=10)
plt.yticks(size=15)
plt.show()
import matplotlib.dates as mdates
predgrp = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
df_t = predgrp.tail(34)
df_recov = df_t.reset_index().drop('index',axis=1)
pk_recov_increase = daily_increase(df_recov['Recovered'])
pk_recov_increase = pk_recov_increase[1:]
plt.figure(figsize=(15, 10))
bar_plot = plt.bar(predgrp['Date'].tail(33), pk_recov_increase, color = '#8dc354', width = 0.5, align = 'center')
autolabel(bar_plot)
plt.title('Daily COVID-19 Recovered Cases in {}'.format('Pakistan'), size=20)
plt.xlabel('Dates', size=20)
plt.ylabel('Number of Cases', size=20)
plt.xticks(size=10)
plt.yticks(size=15)
plt.show()
import plotly.express as px
import plotly.graph_objs as go
import plotly as py
from plotly import tools
from plotly.offline import iplot
#-------------------------#
fig = go.Figure()
pk_cnfrm_inc = daily_increase(predgrp['Confirmed'])
pk_fat_inc = daily_increase(predgrp['Deaths'])
pk_recov_inc = daily_increase(predgrp['Recovered'])
# Add traces
fig.add_trace(go.Scatter(x=predgrp['Date'], y=pk_cnfrm_inc,
mode='lines+markers',
name='Cases'))
fig.add_trace(go.Scatter(x=predgrp['Date'], y=pk_fat_inc,
mode='lines+markers',
name='Deaths'))
fig.add_trace(go.Scatter(x=predgrp['Date'], y=pk_recov_inc,
mode='lines+markers',
name='Recoveries'))
fig.show()
COVID-19 Daily Spread in Provinces (Last 30 Days)
def get_time_series_province(province):
global fulltable
df = fulltable[(fulltable['Region'] == province)]
return df
def daily_Cnfrm_Province(df, province):
# df_t = df[df['Date'] > '2020-04-01']
df_t = df.tail(31)
df_cnfrm = df_t.reset_index().drop('index',axis=1)
pr_cnfrm_inc = daily_increase(df_cnfrm['Confirmed'])
pr_cnfrm_inc = pr_cnfrm_inc[1:]
plt.figure(figsize=(15, 10))
df_cnfrm = df_cnfrm[1:]
bar_plot = plt.bar(df_cnfrm['Date'], pr_cnfrm_inc, color = '#FF3633', width = 0.5, align = 'center')
autolabel(bar_plot)
plt.title('Daily COVID-19 Confirmed Cases in {}'.format(province), size=20)
plt.xlabel('Dates', size=15)
plt.ylabel('Number of Cases', size=15)
plt.xticks(size=10)
plt.yticks(size=10)
plt.show()
#--------------------------#
def daily_Recov_Province(df, province):
#df_t = df[df['Date'] > '2020-04-01']
df_t = df.tail(31)
df_recov = df_t.reset_index().drop('index',axis=1)
pr_recov_inc = daily_increase(df_recov['Recovered'])
pr_recov_inc = pr_recov_inc[1:]
plt.figure(figsize=(15, 10))
df_recov = df_recov[1:]
bar_plot = plt.bar(df_recov['Date'], pr_recov_inc, color = '#8dc354', width = 0.5, align = 'center')
autolabel(bar_plot)
plt.title('Daily COVID-19 Recovered Cases in {}'.format(province), size=20)
plt.xlabel('Dates', size=15)
plt.ylabel('Number of Cases', size=15)
plt.xticks(size=10)
plt.yticks(size=10)
plt.show()
#-------------------------#
def daily_Fat_Province(df, province):
#df_t = df[df['Date'] > '2020-04-01']
df_t = df.tail(31)
df_fat = df_t.reset_index().drop('index',axis=1)
pr_fat_inc = daily_increase(df_fat['Deaths'])
pr_fat_inc = pr_fat_inc[1:]
plt.figure(figsize=(15, 10))
df_fat = df_fat[1:]
bar_plot = plt.bar(df_fat['Date'], pr_fat_inc, color = '#000000', width = 0.5, align = 'center')
autolabel(bar_plot)
plt.title('Daily COVID-19 Fatalities in {}'.format(province), size=20)
plt.xlabel('Dates', size=15)
plt.ylabel('Number of Cases', size=15)
plt.xticks(size=10)
plt.yticks(size=10)
plt.show()
import warnings
warnings.filterwarnings('ignore')
provinces = ['Punjab', 'Sindh', 'KP', 'Balochistan', 'ICT', 'GB','AJK']
#provinces = ['Balochistan','ICT']
for province in provinces:
df = get_time_series_province(province)
daily_Cnfrm_Province(df, province)
daily_Recov_Province(df, province)
daily_Fat_Province(df, province)
Pakistan Weekly Analysis
def Covid_Analysis_LastWeek(df):
#df_t = df[df['Date'] > '2020-04-27']
df_t = df.tail(8)
width = 0.25
# Plotting the bars
fig, ax = plt.subplots(figsize=(12,10))
df_pk = df_t.reset_index().drop('index',axis=1)
pr_cnfrm_inc = daily_increase(df_pk['Confirmed'])
pr_cnfrm_inc = pr_cnfrm_inc[1:]
#pos = list(range(len(pr_cnfrm_inc['Confirmed'])))
pr_fat_inc = daily_increase(df_pk['Deaths'])
pr_fat_inc = pr_fat_inc[1:]
pr_recov_inc = daily_increase(df_pk['Recovered'])
pr_recov_inc = pr_recov_inc[1:]
df_pk = df_pk[1:]
bar_plot = plt.bar(df_pk['Date'], pr_cnfrm_inc, color = '#FF3633', width=0.25, align = 'center')
bar_plot2 = plt.bar(df_pk['Date'], pr_recov_inc, color = '#8dc354', width=0.25, align = 'center')
bar_plot3 = plt.bar(df_pk['Date'], pr_fat_inc, color = '#000000', width=0.25, align = 'center')
autolabel(bar_plot)
autolabel(bar_plot2)
autolabel(bar_plot3)
plt.title('COVID-19 Cases {} Last 7 days'.format('Pakistan'), size=10)
plt.xlabel('Dates', size=10)
plt.ylabel('Number of Cases', size=10)
plt.xticks(size=10)
plt.yticks(size=10)
plt.legend(['Confirmed', 'Recovered', 'Deaths'], loc='upper left')
plt.show()
# '#FF3633' | '#8dc354' | '#000000'
# def Covid_Analysis_Last24Hours(df):
# #df_t = df[df['Date'] > '2020-04-27']
# df_t = df.tail(2)
# width = 0.25
# # Plotting the bars
# fig, ax = plt.subplots(figsize=(12,8))
# df_pk = df_t.reset_index().drop('index',axis=1)
# pr_cnfrm_inc = daily_increase(df_pk['Confirmed'])
# pr_cnfrm_inc = pr_cnfrm_inc[1:]
# #pos = list(range(len(pr_cnfrm_inc['Confirmed'])))
# pr_fat_inc = daily_increase(df_pk['Deaths'])
# pr_fat_inc = pr_fat_inc[1:]
# pr_recov_inc = daily_increase(df_pk['Recovered'])
# pr_recov_inc = pr_recov_inc[1:]
# df_pk = df_pk[1:]
# bar_plot = plt.bar(df_pk['Region'], pr_cnfrm_inc, color = '#FF3633', width=0.25, align = 'center')
# bar_plot2 = plt.bar(df_pk['Region'], pr_recov_inc, color = '#8dc354', width=0.25, align = 'center')
# bar_plot3 = plt.bar(df_pk['Region'], pr_fat_inc, color = '#000000', width=0.25, align = 'center')
# autolabel(bar_plot)
# autolabel(bar_plot2)
# autolabel(bar_plot3)
# plt.title('COVID-19 Cases {} Last 7 days'.format('Pakistan'), size=10)
# plt.xlabel('Dates', size=10)
# plt.ylabel('Number of Cases', size=10)
# plt.xticks(size=10)
# plt.yticks(size=10)
# plt.legend(['Confirmed', 'Recovered', 'Deaths'], loc='upper left')
# plt.show()
import warnings
warnings.filterwarnings('ignore')
df = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
Covid_Analysis_LastWeek(df)
# import warnings
# warnings.filterwarnings('ignore')
# df = fulltable.groupby("Region")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
# Covid_Analysis_Last24Hours(df)
#df_t = df[df['Date'] > '2020-04-27']
# global fulltable
# df = fulltable[(fulltable['Region'] == 'Punjab')]
# df_t = df.tail(8)
# df_t
# #width = 0.25
# # Plotting the bars
# fig, ax = plt.subplots(figsize=(12,8))
# df_p = df_t.reset_index().drop('index',axis=1)
# p_cnfrm_inc = daily_increase(df_p['Confirmed'])
# p_cnfrm_inc = p_cnfrm_inc[1:]
# # #---------------------#
# dfs = fulltable[(fulltable['Region'] == 'Sindh')]
# df_t = dfs.tail(8)
# width = 0.25
# df_s = df_t.reset_index().drop('index',axis=1)
# s_cnfrm_inc = daily_increase(df_s['Confirmed'])
# s_cnfrm_inc = s_cnfrm_inc[1:]
# df_p = df_p[1:]
# bar_plot2 = plt.bar(df_p['Date'], s_cnfrm_inc, color = 'lime', width=0.25, align = 'center')
# bar_plot = plt.bar(df_p['Date'], p_cnfrm_inc, color = 'cornflowerblue', width=0.25, align = 'center')
# # bar_plot3 = plt.bar(df_pk['Date'], pr_fat_inc, color = 'r', width=0.25, align = 'center')
# autolabel(bar_plot)
# autolabel(bar_plot2)
# #autolabel(bar_plot3)
# plt.title('COVID-19 Cases {} Last 7 days'.format('Pakistan'), size=10)
# plt.xlabel('Dates', size=10)
# plt.ylabel('Number of Cases', size=10)
# plt.xticks(size=10)
# plt.yticks(size=10)
# plt.legend(['Punjab', 'Sindh'], loc='upper left')
# plt.show()
Provincial Weekly Analysis, PAKISTAN
def Covid_Analysis_LastWeek_pr(df, province):
df_t = df.tail(8)
width = 0.25
# Plotting the bars
fig, ax = plt.subplots(figsize=(12,8))
df_pk = df_t.reset_index().drop('index',axis=1)
pr_cnfrm_inc = daily_increase(df_pk['Confirmed'])
pr_cnfrm_inc = pr_cnfrm_inc[1:]
#pos = list(range(len(pr_cnfrm_inc['Confirmed'])))
pr_fat_inc = daily_increase(df_pk['Deaths'])
pr_fat_inc = pr_fat_inc[1:]
pr_recov_inc = daily_increase(df_pk['Recovered'])
pr_recov_inc = pr_recov_inc[1:]
df_pk = df_pk[1:]
bar_plot = plt.bar(df_pk['Date'], pr_cnfrm_inc, color = '#FF3633', width=0.25, align = 'center')
bar_plot2 = plt.bar(df_pk['Date'], pr_recov_inc, color = '#8dc354', width=0.25, align = 'center')
bar_plot3 = plt.bar(df_pk['Date'], pr_fat_inc, color = '#000000', width=0.25, align = 'center')
autolabel(bar_plot)
autolabel(bar_plot2)
autolabel(bar_plot3)
plt.title('Weekly COVID-19 Cases in {}'.format(province), size=10)
plt.xlabel('Dates', size=15)
plt.ylabel('Number of Cases', size=15)
plt.xticks(size=10)
plt.yticks(size=10)
plt.legend(['Confirmed', 'Recovered', 'Deaths'], loc='upper left')
plt.show()
provinces = ['Punjab', 'Sindh', 'KP', 'Balochistan', 'ICT', 'GB','AJK']
#provinces = ['Punjab']
for province in provinces:
df = get_time_series_province(province)
Covid_Analysis_LastWeek_pr(df, province)
Pakistan Weekly Increase
predgrp = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
pak_datewise=fulltable.groupby(["Date"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
pak_datewise["WeekofYear"]=pak_datewise.index.weekofyear
pak_datewise
week_num_pak=[]
pak_weekwise_confirmed=[]
pak_weekwise_recovered=[]
pak_weekwise_deaths=[]
w=1
for i in list(pak_datewise["WeekofYear"].unique()):
pak_weekwise_confirmed.append(pak_datewise[pak_datewise["WeekofYear"]==i]["Confirmed"].iloc[-1])
pak_weekwise_recovered.append(pak_datewise[pak_datewise["WeekofYear"]==i]["Recovered"].iloc[-1])
pak_weekwise_deaths.append(pak_datewise[pak_datewise["WeekofYear"]==i]["Deaths"].iloc[-1])
week_num_pak.append(w)
w=w+1
plt.figure(figsize=(10,5))
plt.plot(week_num_pak,pak_weekwise_confirmed,linewidth=3,label="Weekly Growth of Confirmed Cases", color = '#FF3633')
plt.plot(week_num_pak,pak_weekwise_recovered,linewidth=3,label="Weekly Growth of Recovered Cases",color = '#8dc354')
plt.plot(week_num_pak,pak_weekwise_deaths,linewidth=3,label="Weekly Growth of Death Cases", color = '#000000')
plt.xlabel('Week Number')
plt.ylabel("Number of Cases")
plt.title("COVID-19 Weekly Growth in Pakistan")
plt.legend()
COVID-19 Pakistan Weekly Analysis
import seaborn as sns
print("Average weekly increase in number of Confirmed Cases",round(pd.Series(pak_weekwise_confirmed).diff().fillna(0).mean()))
print("Average weekly increase in number of Recovered Cases",round(pd.Series(pak_weekwise_recovered).diff().fillna(0).mean()))
print("Average weekly increase in number of Death Cases",round(pd.Series(pak_weekwise_deaths).diff().fillna(0).mean()))
fig, (ax1,ax2) = plt.subplots(1, 2,figsize=(20,10))
bar1 = sns.barplot(x=week_num_pak,y=pd.Series(pak_weekwise_confirmed).diff().fillna(0),ax=ax1)
for p in bar1.patches:
bar1.annotate(format(p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
bar2 = sns.barplot(x=week_num_pak,y=pd.Series(pak_weekwise_deaths).diff().fillna(0),ax=ax2)
for p in bar2.patches:
bar2.annotate(format(p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
ax1.set_xlabel("Week Number")
ax2.set_xlabel("Week Number")
ax1.set_ylabel("Number of Confirmed Cases")
ax2.set_ylabel("Number of Fatalities")
ax1.set_title("COVID-19 Weekly Increase in Pakistan (Confirmed Cases)")
ax2.set_title("COVID-19 Weekly Increase in Pakistan (Fatalities)")
plt.figure(figsize=(12,8))
#sns.set(rc={'figure.figsize':(11.7,8.27)})
bar1 = sns.barplot(x=week_num_pak,y=pd.Series(pak_weekwise_confirmed).diff().fillna(0))
for p in bar1.patches:
bar1.annotate(format(p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
plt.xlabel("Week Number", size=15)
plt.ylabel("Number of Confirmed Cases", size=15)
plt.title("COVID-19 Weekly Increase in Pakistan (Confirmed Cases)", size =15)
#sns.set(rc={'figure.figsize':(11.7,8.27)})
plt.figure(figsize=(12,8))
bar2 = sns.barplot(x=week_num_pak,y=pd.Series(pak_weekwise_deaths).diff().fillna(0))
for p in bar2.patches:
bar2.annotate(format(p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
plt.xlabel("Week Number", size = 15)
plt.ylabel("Number of Fatalities", size = 15)
plt.title("COVID-19 Weekly Increase in Pakistan (Fatalities)", size = 15)
global fulltable
predgrp = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
predgrp['Date'] = pd.to_datetime(predgrp['Date'])
df_t = predgrp[predgrp['Date'] > '2020-03-01']
table = df_t.set_index('Date')
table = table.resample('M').max()
table
table.plot(figsize=(10,5), color = ['#FF3633','#8dc354','#000000'])
plt.title('COVID-19 Pakistan Monthly Growth', size=14)
plt.xlabel('Months', size=14)
plt.ylabel('COVID-19 Count', size=14 )
plt.show()
COVID-19 Pakistan Monthly Growth
ax = table.plot(kind = 'bar', figsize=(12,8), color = ['#FF3633','#8dc354','#000000'])
ax.set_xticklabels(['March','April','May', 'June'], rotation=360)
for p in ax.patches:
ax.annotate(format(p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
plt.title('COVID-19 Pakistan Monthly Growth', size=14)
plt.xlabel('Months', size=14)
plt.ylabel('COVID-19 Count', size=14 )
plt.grid()
plt.show()
COVID-19 Pakistan Monthly Increase
predgrp = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
predgrp['Date'] = pd.to_datetime(predgrp['Date'])
table = predgrp.set_index('Date')
table = table.resample('M').max()
#table = round(pd.Series(table.diff().fillna(0).mean()))
table = table.diff()
table = table.tail(4)
table
ax = table.plot(kind = 'bar', figsize=(12,8), color = ['#FF3633','#8dc354','#000000'])
ax.set_xticklabels(['March','April','May', 'June'], rotation=360)
for p in ax.patches:
ax.annotate(format(p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
plt.title('COVID-19 Monthly Cases in Pakistan', size=14)
plt.xlabel('Months', size=14)
plt.ylabel('COVID-19 Count', size=14 )
plt.legend(['Confirmed', 'Recovered', 'Deaths'], loc='upper left')
plt.grid()
plt.show()
COVID-19 PAKISTAN TREND
import matplotlib.dates as mdates
predgrp = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
pk_recov_increase = daily_increase(predgrp['Recovered'])
pk_fat_inc = daily_increase(predgrp['Deaths'])
pk_cnfrm_increase = daily_increase(predgrp['Confirmed'])
plt.figure(figsize=(12, 8))
plot = plt.plot(predgrp['Date'], pk_cnfrm_increase, color = '#FF3633', marker='o', linestyle='dashed')
plot = plt.plot(predgrp['Date'], pk_recov_increase, color = '#8dc354', marker='o', linestyle='dashed')
plot = plt.plot(predgrp['Date'], pk_fat_inc, color='#000000', marker='o', linestyle='dashed')
#autolabel(bar_plot)
plt.title('COVID-19 Growth Factor {}'.format('Pakistan'), size=15)
plt.xlabel('Dates', size=15)
plt.ylabel('COVID-19 Cases', size=15)
plt.xticks(size=10)
plt.yticks(size=10)
plt.legend(['Confirmed','Recovered','Deaths'])
plt.grid()
plt.show()
def difference_c(dataset, interval=1):
index = list(dataset.index)
diff = list()
for i in range(interval, len(dataset)):
value = dataset["Confirmed"][i] - dataset["Confirmed"][i - interval]
diff.append(value)
return (diff)
def difference_e(dataset, interval=1):
index = list(dataset.index)
diff = list()
for i in range(interval, len(dataset)):
value = dataset["Deaths"][i] - dataset["Deaths"][i - interval]
diff.append(value)
return (diff)
def difference_r(dataset, interval=1):
index = list(dataset.index)
diff = list()
for i in range(interval, len(dataset)):
value = dataset["Recovered"][i] - dataset["Recovered"][i - interval]
diff.append(value)
return (diff)
#-----------------------------------#
global fulltable
predgrp = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
diffc = difference_c(predgrp )
diffe = difference_e(predgrp )
diffr = difference_r(predgrp )
plt.figure(figsize= (12,8))
plt.plot(diffc, color = '#FF3633', marker='o', linestyle='dashed')
plt.plot(diffe, color = '#000000', marker='o', linestyle='dashed')
plt.plot(diffr, color = '#8dc354', marker='o', linestyle='dashed')
plt.title("COVID-19 Growth Factor Pakistan",size=15)
plt.xlabel('Days',size=15)
plt.ylabel('COVID-19 Cases',size=15)
plt.legend(['Confirmed','Deaths','Recovered'])
plt.grid()
plt.show()
def difference_c(dataset, interval=1):
index = list(dataset.index)
diff = list()
for i in range(interval, len(dataset)):
value = dataset["Confirmed"][i] - dataset["Confirmed"][i - interval]
diff.append(value)
return (diff)
def difference_e(dataset, interval=1):
index = list(dataset.index)
diff = list()
for i in range(interval, len(dataset)):
value = dataset["Deaths"][i] - dataset["Deaths"][i - interval]
diff.append(value)
return (diff)
def difference_r(dataset, interval=1):
index = list(dataset.index)
diff = list()
for i in range(interval, len(dataset)):
value = dataset["Recovered"][i] - dataset["Recovered"][i - interval]
diff.append(value)
return (diff)
#-----------------------------------#
global fulltable
predgrp = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
diffc = difference_c(predgrp )
diffe = difference_e(predgrp )
diffr = difference_r(predgrp )
plt.figure(figsize= (12,8))
plt.plot(diffc, color = '#FF3633')
plt.plot(diffe, color = '#000000')
plt.plot(diffr, color = '#8dc354')
plt.title("COVID-19 Growth Factor Pakistan",size=15)
plt.xlabel('Days',size=15)
plt.ylabel('COVID-19 Cases',size=15)
plt.legend(['Confirmed','Deaths','Recovered'])
plt.grid()
plt.show()
Pakistan National Forecast using Prophet
global fulltable
predgrp = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
pred_cnfrm = predgrp.loc[:,["Date","Confirmed"]]
pred_exp = predgrp.loc[:,["Date","Deaths"]]
c_data = pred_cnfrm
e_data = pred_exp
c_data.columns = ['ds','y']
e_data.columns = ['ds','y']
# Confirm Count
mc = Prophet(daily_seasonality=True)
mc.fit(c_data)
future_c = mc.make_future_dataframe(periods=15)
forecast_c = mc.predict(future_c)
fig, (ax1,ax2) = plt.subplots(1, 2,figsize=(15,7))
#py.init_notebook_mode()
fig1 = mc.plot(forecast_c,xlabel='Date',ylabel='Pakistan Confirmed Count', ax=ax1)
# Detecting Trend Change ponits
#add_changepoints_to_plot(fig1.gca(), mc, forecast_c)
# Expiry Count
me = Prophet(daily_seasonality=True)
me.fit(e_data)
future_e = me.make_future_dataframe(periods=15)
forecast_e = me.predict(future_e)
#py.init_notebook_mode()
fig2 = me.plot(forecast_e,xlabel='Date',ylabel='Pakistan Fatalities',ax=ax2)
# Detecting Trend Change ponits
#a2 = add_changepoints_to_plot(fig2.gca(), me, forecast_e)
Prophet Provincial Model
def prophet_prov_forecast(df, province):
predgrp = df.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
pred_cnfrm = predgrp.loc[:,["Date","Confirmed"]]
pred_exp = predgrp.loc[:,["Date","Deaths"]]
c_data = pred_cnfrm
e_data = pred_exp
c_data.columns = ['ds','y']
e_data.columns = ['ds','y']
# Confirm Count
mc = Prophet(daily_seasonality=True)
mc.fit(c_data)
future_c = mc.make_future_dataframe(periods=15)
forecast_c = mc.predict(future_c)
fig, (ax1,ax2) = plt.subplots(1, 2,figsize=(15,7))
#py.init_notebook_mode()
fig1 = mc.plot(forecast_c,xlabel='Date',ylabel=province +' Confirmed Count', ax=ax1)
# Detecting Trend Change ponits
# a1 = add_changepoints_to_plot(fig1.gca(), mc, forecast_c)
# Expiry Count
me = Prophet(daily_seasonality=True)
me.fit(e_data)
future_e = me.make_future_dataframe(periods=15)
forecast_e = me.predict(future_e)
#py.init_notebook_mode()
fig2 = me.plot(forecast_e,xlabel='Date',ylabel=province + ' Fatalities',ax=ax2)
# Detecting Trend Change ponits
#a2 = add_changepoints_to_plot(fig2.gca(), me, forecast_e)
def get_time_series_province_pr(province):
global fulltable
df = fulltable[(fulltable['Region'] == province)]
return df
import warnings
warnings.filterwarnings('ignore')
provinces = ['Punjab', 'Sindh', 'KP', 'Balochistan']
#province = 'Punjab'
for province in provinces:
df = get_time_series_province_pr(province)
prophet_prov_forecast(df, province)
import warnings
warnings.filterwarnings('ignore')
provinces = ['ICT', 'GB', 'AJK']
#province = 'Punjab'
for province in provinces:
df = get_time_series_province_pr(province)
prophet_prov_forecast(df, province)
Prophet National Model
# Confirm Count
mc = Prophet(daily_seasonality=True)
mc.fit(c_data)
future_c = mc.make_future_dataframe(periods=15)
forecast_c = mc.predict(future_c)
#py.init_notebook_mode()
fig1 = mc.plot(forecast_c,xlabel='Date',ylabel='Pakistan Confirmed Count')
axes = fig1.get_axes()
axes[0].set_title('Prophet Confirmed Forecast for next 15 days', size=12)
# Detecting Trend Change ponits
a = add_changepoints_to_plot(fig1.gca(), mc, forecast_c)
fig2 = mc.plot_components(forecast_c)
# Expiry Count
me = Prophet(daily_seasonality=True)
me.fit(e_data)
future_e = me.make_future_dataframe(periods=15)
forecast_e = me.predict(future_e)
#py.init_notebook_mode()
fig1 = me.plot(forecast_e,xlabel='Date',ylabel='Pakistan Fatalities')
axes = fig1.get_axes()
axes[0].set_title('Prophet Fatalities Forecast for next 15 days', size=12)
# Detecting Trend Change ponits
a = add_changepoints_to_plot(fig1.gca(), me, forecast_e)
fig2 = me.plot_components(forecast_e)
Pakistan National Forecast using ARIMA
import datetime
from statsmodels.tsa.arima_model import ARIMA
global fulltable
predgrp = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
pred_cnfrm = predgrp.loc[:,["Date","Confirmed"]]
pr_data = pred_cnfrm
pr_data.columns = ['ds','y']
import warnings
warnings.filterwarnings('ignore')
global fulltable
predgrp = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
# Confirm Cases
pred_cnfrm = predgrp.loc[:,["Date","Confirmed"]]
pr_data_c = pred_cnfrm
pr_data_c.columns = ['ds','y']
# Model Confirm
arima_c = ARIMA(pr_data_c['y'], order=(5, 1, 0))
arima_c = arima_c.fit(trend='c', full_output=True, disp=True)
forecast_c = arima_c.forecast(steps= 15)
pred_c = list(forecast_c[0])
# Expired Cases
pred_exp = predgrp.loc[:,["Date","Deaths"]]
pr_data_e = pred_exp
pr_data_e.columns = ['ds','y']
arima_e = ARIMA(pr_data_e['y'], order=(5, 1, 0))
arima_e = arima_e.fit(trend='c', full_output=True, disp=True)
forecast_e = arima_e.forecast(steps= 15)
pred_e = list(forecast_e[0])
# Recovered Cases
pred_rec = predgrp.loc[:,["Date","Recovered"]]
pr_data_r = pred_rec
pr_data_r.columns = ['ds','y']
# Model
arima_r = ARIMA(pr_data_r['y'], order=(5, 1, 0))
arima_r = arima_r.fit(trend='c', full_output=True, disp=True)
forecast_r = arima_r.forecast(steps= 15)
pred_r = list(forecast_r[0])
# Plot Forecast
start_date = pr_data['ds'].max()
prediction_dates = []
for i in range(15):
date = start_date + datetime.timedelta(days=1)
prediction_dates.append(date)
start_date = date
plt.figure(figsize= (10,7))
plt.xlabel("Dates",fontsize = 15)
plt.ylabel('COVID-19 Pakistan ForeCast',fontsize = 15)
plt.title("Predicted Values for the next 15 Days" , fontsize = 20)
#'#ff9999'-->pink
plt.plot_date(y= pred_c,x= prediction_dates,linestyle ='dashed',color = 'c',label = 'Confirmed Forecast');
plt.plot_date(y=pr_data_c['y'],x=pr_data_c['ds'],linestyle = '-',color = 'blue',label = 'Confirmed');
plt.plot_date(y= pred_e,x= prediction_dates,linestyle ='dashed',color = '#ff9999',label = 'Fatalities Forecast');
plt.plot_date(y=pr_data_e['y'],x=pr_data_e['ds'],linestyle = '-',color = 'red',label = 'Fatalities');
plt.plot_date(y= pred_r,x= prediction_dates,linestyle ='dashed',color = 'lime',label = 'Recovered Forecast');
plt.plot_date(y=pr_data_r['y'],x=pr_data_r['ds'],linestyle = '-',color = 'limegreen',label = 'Recovered');
plt.legend();
# import numpy as np
# global predgrp
# ts_log = np.log(fulltable)
# plt.title('Log of the data')
# plt.plot(ts_log)
# plt.show()
import warnings
warnings.filterwarnings('ignore')
global fulltable
predgrp = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
# Confirm Cases
pred_cnfrm = predgrp.loc[:,["Date","Confirmed"]]
pr_data_c = pred_cnfrm
pr_data_c.columns = ['ds','y']
# Model Confirm
arima_c = ARIMA(pr_data_c['y'], order=(5, 1, 0))
arima_c = arima_c.fit(trend='c', full_output=True, disp=True)
forecast_c = arima_c.forecast(steps= 15)
pred_c = list(forecast_c[0])
# Expired Cases
pred_exp = predgrp.loc[:,["Date","Deaths"]]
pr_data_e = pred_exp
pr_data_e.columns = ['ds','y']
arima_e = ARIMA(pr_data_e['y'], order=(5, 1, 0))
arima_e = arima_e.fit(trend='c', full_output=True, disp=True)
forecast_e = arima_e.forecast(steps= 15)
pred_e = list(forecast_e[0])
# Recovered Cases
pred_rec = predgrp.loc[:,["Date","Recovered"]]
pr_data_r = pred_rec
pr_data_r.columns = ['ds','y']
# Model
arima_r = ARIMA(pr_data_r['y'], order=(5, 1, 0))
arima_r = arima_r.fit(trend='c', full_output=True, disp=True)
forecast_r = arima_r.forecast(steps= 15)
pred_r = list(forecast_r[0])
# Plot Forecast
start_date = pr_data['ds'].max()
prediction_dates = []
for i in range(15):
date = start_date + datetime.timedelta(days=1)
prediction_dates.append(date)
start_date = date
plt.figure(figsize= (20,15))
ax1 = plt.subplot(221)
ax1.plot_date(y= pred_c,x= prediction_dates,linestyle ='dashed',color = 'c' , label = 'Confirmed Forecast');
ax1.plot_date(y=pr_data_c['y'],x=pr_data_c['ds'],linestyle = '-',color = 'b' , label = 'Confirmed');
ax1.set_title("Confirmed Cases Forecast for next 15 Days" , fontsize = 15)
ax1.set_xlabel("Dates",fontsize = 13)
ax1.set_ylabel('COVID-19 Pakistan ForeCast',fontsize = 13)
ax1.legend()
ax2 = plt.subplot(222)
ax2.plot_date(y= pred_e,x= prediction_dates,linestyle ='dashed',color = '#ff9999',label = 'Fatalities Forecast');
ax2.plot_date(y=pr_data_e['y'],x=pr_data_e['ds'],linestyle = '-',color = 'r',label = 'Fatalities');
ax2.set_title("Fatalities Forecast for next 15 Days" , fontsize = 15)
ax2.set_xlabel("Dates",fontsize = 13)
ax2.set_ylabel('COVID-19 Pakistan ForeCast',fontsize = 13)
ax2.legend();
Confirmed Cases Forecast
#arima = ARIMA(pr_data['y'], order=(3, 2, 0))
arima = ARIMA(pr_data['y'], order=(5, 1, 0))
arima = arima.fit(trend='c', full_output=True, disp=True)
forecast = arima.forecast(steps= 15)
pred = list(forecast[0])
start_date = pr_data['ds'].max()
prediction_dates = []
for i in range(15):
date = start_date + datetime.timedelta(days=1)
prediction_dates.append(date)
start_date = date
plt.figure(figsize= (10,7))
plt.xlabel("Dates",fontsize = 15)
plt.ylabel('Cofirmed Cases',fontsize = 15)
plt.title("ARIMA Forecast for next 15 Days" , fontsize = 20)
#'#ff9999'-->pink
plt.plot_date(y= pred,x= prediction_dates,linestyle ='dashed',color = 'c',label = 'Confirmed Forecast');
plt.plot_date(y=pr_data['y'],x=pr_data['ds'],linestyle = '-',color = 'blue',label = 'Confirmed');
plt.legend();
residuals = arima.resid
mae = np.mean(np.abs(residuals))
mae
Fatalities Forecast
global predgrp
pred_exp = predgrp.loc[:,["Date","Deaths"]]
pr_data = pred_exp
pr_data.columns = ['ds','y']
#arima = ARIMA(pr_data['y'], order=(3, 2,0))
arima = ARIMA(pr_data['y'], order=(5, 1, 0))
arima = arima.fit(trend='c', full_output=True, disp=True)
forecast = arima.forecast(steps= 15)
pred = list(forecast[0])
start_date = pr_data['ds'].max()
prediction_dates = []
for i in range(15):
date = start_date + datetime.timedelta(days=1)
prediction_dates.append(date)
start_date = date
plt.figure(figsize= (10,7))
plt.xlabel("Dates",fontsize = 15)
plt.ylabel('Fatalities',fontsize = 15)
plt.title("ARIMA Forecast for next 15 Days" , fontsize = 20)
#'#ff9999'-->pink
plt.plot_date(y= pred,x= prediction_dates,linestyle ='dashed',color = '#ff9999',label = 'Fatalities Forecast');
plt.plot_date(y=pr_data['y'],x=pr_data['ds'],linestyle = '-',color = 'red',label = 'Fatalities');
plt.legend();
residuals = arima.resid
mae = np.mean(np.abs(residuals))
mae
Recovered Forecast
import warnings
warnings.filterwarnings('ignore')
# Recovered Cases
pred_rec = predgrp.loc[:,["Date","Recovered"]]
pr_data_r = pred_rec
pr_data_r.columns = ['ds','y']
# Model
#arima_r = ARIMA(pr_data_r['y'], order=(3, 2, 0))
arima_r = ARIMA(pr_data_r['y'], order=(5, 1, 0))
arima_r = arima_r.fit(trend='c', full_output=True, disp=True)
forecast_r = arima_r.forecast(steps= 15)
pred_r = list(forecast_r[0])
# Plot Forecast
start_date = pr_data_r['ds'].max()
prediction_dates = []
for i in range(15):
date = start_date + datetime.timedelta(days=1)
prediction_dates.append(date)
start_date = date
plt.figure(figsize= (10,7))
plt.xlabel("Dates",fontsize = 15)
plt.ylabel('Recovered',fontsize = 15)
plt.title("Predicted Values for the next 15 Days" , fontsize = 20)
plt.plot_date(y= pred_r ,x= prediction_dates,linestyle ='dashed',color = 'lime',label = 'Recovered Forecast');
plt.plot_date(y=pr_data_r['y'],x=pr_data_r['ds'],linestyle = '-',color = 'limegreen',label = 'Recovered');
plt.legend();
residuals = arima_r.resid
mae = np.mean(np.abs(residuals))
mae
ARIMA : Provincial Model Pakistan
global fulltable
def ARIMA_Prov_old(predgrp, province):
#predgrp = predgrp.groupby("Date")[["Confirmed","Recovered","Deaths"]]
# Confirm Cases
pred_cnfrm = predgrp.loc[:,["Date","Confirmed"]]
pr_data_c = pred_cnfrm
pr_data_c.columns = ['ds','y']
# Model Confirm
arima_c = ARIMA(pr_data_c['y'], order=(5, 1, 0))
arima_c = arima_c.fit(trend='c', full_output=True, disp=True)
forecast_c = arima_c.forecast(steps= 15)
pred_c = list(forecast_c[0])
# Expired Cases
pred_exp = predgrp.loc[:,["Date","Deaths"]]
pr_data_e = pred_exp
pr_data_e.columns = ['ds','y']
arima_e = ARIMA(pr_data_e['y'], order=(5, 1, 0))
arima_e = arima_e.fit(trend='c', full_output=True, disp=True)
forecast_e = arima_e.forecast(steps= 15)
pred_e = list(forecast_e[0])
# Recovered Cases
pred_rec = predgrp.loc[:,["Date","Recovered"]]
pr_data_r = pred_rec
pr_data_r.columns = ['ds','y']
# Model
arima_r = ARIMA(pr_data_r['y'], order=(5, 1, 0))
arima_r = arima_r.fit(trend='c', full_output=True, disp=True)
forecast_r = arima_r.forecast(steps= 15)
pred_r = list(forecast_r[0])
# Plot Forecast
start_date = pr_data['ds'].max()
prediction_dates = []
for i in range(15):
date = start_date + datetime.timedelta(days=1)
prediction_dates.append(date)
start_date = date
plt.figure(figsize= (10,7))
plt.xlabel("Dates",fontsize = 15)
plt.ylabel('COVID-19 '+ province + ' ForeCast',fontsize = 15)
plt.title("ARIMA MODEL(Predicted Values for the next 15 Days)" , fontsize = 20)
#'#ff9999'-->pink
plt.plot_date(y= pred_c,x= prediction_dates,linestyle ='dashed',color = 'c',label = 'Confirmed Forecast');
plt.plot_date(y=pr_data_c['y'],x=pr_data_c['ds'],linestyle = '-',color = 'blue',label = 'Confirmed');
plt.plot_date(y= pred_e,x= prediction_dates,linestyle ='dashed',color = '#ff9999',label = 'Fatalities Forecast');
plt.plot_date(y=pr_data_e['y'],x=pr_data_e['ds'],linestyle = '-',color = 'red',label = 'Fatalities');
plt.plot_date(y= pred_r,x= prediction_dates,linestyle ='dashed',color = 'lime',label = 'Recovered Forecast');
plt.plot_date(y=pr_data_r['y'],x=pr_data_r['ds'],linestyle = '-',color = 'limegreen',label = 'Recovered');
plt.legend();
global fulltable
def ARIMA_Prov(predgrp, province):
#predgrp = predgrp.groupby("Date")[["Confirmed","Recovered","Deaths"]]
# Confirm Cases
pred_cnfrm = predgrp.loc[:,["Date","Confirmed"]]
pr_data_c = pred_cnfrm
pr_data_c.columns = ['ds','y']
# Model Confirm
arima_c = ARIMA(pr_data_c['y'], order=(5, 1, 0))
arima_c = arima_c.fit(trend='c', full_output=True, disp=True)
forecast_c = arima_c.forecast(steps= 15)
pred_c = list(forecast_c[0])
# Expired Cases
pred_exp = predgrp.loc[:,["Date","Deaths"]]
pr_data_e = pred_exp
pr_data_e.columns = ['ds','y']
arima_e = ARIMA(pr_data_e['y'], order=(5, 1, 0))
arima_e = arima_e.fit(trend='c', full_output=True, disp=True)
forecast_e = arima_e.forecast(steps= 15)
pred_e = list(forecast_e[0])
# # Recovered Cases
# pred_rec = predgrp.loc[:,["Date","Recovered"]]
# pr_data_r = pred_rec
# pr_data_r.columns = ['ds','y']
# # Model
# arima_r = ARIMA(pr_data_r['y'], order=(5, 1, 0))
# arima_r = arima_r.fit(trend='c', full_output=True, disp=True)
# forecast_r = arima_r.forecast(steps= 15)
# pred_r = list(forecast_r[0])
# Plot Forecast
start_date = pr_data['ds'].max()
prediction_dates = []
for i in range(15):
date = start_date + datetime.timedelta(days=1)
prediction_dates.append(date)
start_date = date
plt.figure(figsize= (20,15))
ax1 = plt.subplot(221)
ax1.plot_date(y= pred_c,x= prediction_dates,linestyle ='dashed',color = 'c' , label = 'Confirmed Forecast');
ax1.plot_date(y=pr_data_c['y'],x=pr_data_c['ds'],linestyle = '-',color = 'b' , label = 'Confirmed');
ax1.set_title("Confirmed Cases Forecast for next 15 Days" , fontsize = 15)
ax1.set_xlabel("Dates",fontsize = 13)
ax1.set_ylabel('COVID-19 '+province+' ForeCast',fontsize = 13)
ax1.legend()
ax2 = plt.subplot(222)
ax2.plot_date(y= pred_e,x= prediction_dates,linestyle ='dashed',color = '#ff9999',label = 'Fatalities Forecast');
ax2.plot_date(y=pr_data_e['y'],x=pr_data_e['ds'],linestyle = '-',color = 'r',label = 'Fatalities');
ax2.set_title("Fatalities Forecast for next 15 Days" , fontsize = 15)
ax2.set_xlabel("Dates",fontsize = 13)
ax2.set_ylabel('COVID-19 '+province+' ForeCast',fontsize = 13)
ax2.legend();
def get_time_series_province_pr(province):
global fulltable
df = fulltable[(fulltable['Region'] == province)]
return df
import warnings
warnings.filterwarnings('ignore')
provinces = ['Punjab', 'Sindh', 'KP', 'Balochistan']
#province = 'Punjab'
for province in provinces:
df = get_time_series_province_pr(province)
ARIMA_Prov(df, province)
# ARIMA_Prov_old(df, province)
import warnings
warnings.filterwarnings('ignore')
provinces = ['ICT', 'GB']
#province = 'Punjab'
for province in provinces:
df = get_time_series_province_pr(province)
ARIMA_Prov(df, province)
province = 'AJK'
predgrp = get_time_series_province_pr(province)
pred_cnfrm = predgrp.loc[:,["Date","Confirmed"]]
pr_data_c = pred_cnfrm
pr_data_c.columns = ['ds','y']
# Model Confirm
arima_c = ARIMA(pr_data_c['y'], order=(5, 1, 0))
arima_c = arima_c.fit(trend='c', full_output=True, disp=True)
forecast_c = arima_c.forecast(steps= 15)
pred_c = list(forecast_c[0])
# Recovered Cases
pred_rec = predgrp.loc[:,["Date","Deaths"]]
pr_data_r = pred_rec
pr_data_r.columns = ['ds','y']
# Model
arima_r = ARIMA(pr_data_r['y'], order=(5, 1, 0))
arima_r = arima_r.fit(trend='c', full_output=True, disp=True)
forecast_r = arima_r.forecast(steps= 15)
pred_r = list(forecast_r[0])
# Plot Forecast
start_date = pr_data['ds'].max()
prediction_dates = []
for i in range(15):
date = start_date + datetime.timedelta(days=1)
prediction_dates.append(date)
start_date = date
plt.figure(figsize= (20,15))
ax1 = plt.subplot(221)
ax1.plot_date(y= pred_c,x= prediction_dates,linestyle ='dashed',color = 'c' , label = 'Confirmed Forecast');
ax1.plot_date(y=pr_data_c['y'],x=pr_data_c['ds'],linestyle = '-',color = 'b' , label = 'Confirmed');
ax1.set_title("Confirmed Cases Forecast for next 15 Days" , fontsize = 15)
ax1.set_xlabel("Dates",fontsize = 13)
ax1.set_ylabel('COVID-19 '+province+' ForeCast',fontsize = 13)
ax1.legend()
ax2 = plt.subplot(222)
ax2.plot_date(y= pred_r,x= prediction_dates,linestyle ='dashed',color = '#ff9999',label = 'Fatalities Forecast');
ax2.plot_date(y=pr_data_r['y'],x=pr_data_r['ds'],linestyle = '-',color = 'red',label = 'Fatalities');
ax2.set_title("Fatalities Forecast for next 15 Days" , fontsize = 15)
ax2.set_xlabel("Dates",fontsize = 13)
ax2.set_ylabel('COVID-19 '+province+' ForeCast',fontsize = 13)
ax2.legend();
I am using a model from a marketing paper by Emmanuelle Le Nagard and Alexandre Steyer, that attempts to reflect the social structure of a diffusion process.
import numpy as np
import pandas as pd
import datetime
# hide warnings
import warnings
warnings.filterwarnings('ignore')
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
COVID-19 Data Modeling for Pakistan
import requests
import io
# Username of GitHub account
username = 'abdulbarimalik'
# Personal Access Token (PAO) from GitHub account
token = 'a5389dd2ed961708e248000815bfb4a638f2b6e8'
# Creates a re-usable session object with creds in-built
github_session = requests.Session()
github_session.auth = (username, token)
# Downloading the csv file from GitHub
url = "https://raw.githubusercontent.com/abdulbarimalik/COVID19/master/Covid-19-PAK.csv?token=AB6MNPZN2XR4SASSW4JSSO26YS46Q" # Make sure the url is the raw version of the file on GitHub
download = github_session.get(url).content
# Reading the downloaded content and making it a pandas dataframe
fulltable_pr = pd.read_csv(io.StringIO(download.decode('utf-8')))
#fulltable_pr = pd.read_excel('F:/Data Sets/COVID-19/Pakistan/time series/COVID-19_DATA.xlsx', sheet_name='TimeSeries_KeyIndicators')
fulltable_pr = fulltable_pr.drop(['Discharged'], axis=1).groupby(['Date','Region']).sum().reset_index()
fulltable_pr['Date'] = pd.to_datetime(fulltable_pr['Date'])
fulltable_pr = fulltable_pr.rename(columns={"Cumulative Test positive": "Confirmed", "Expired": "Deaths","Discharged": "Recovered"})
# latest condensed
full_latest = fulltable_pr[fulltable_pr['Date'] == max(fulltable_pr['Date'])].reset_index()
full_table_grouped = full_latest.groupby('Region')['Confirmed', 'Deaths'].sum().reset_index()
temp_f = full_table_grouped.sort_values(by='Confirmed', ascending=False)
temp_f = temp_f[['Region', 'Confirmed', 'Deaths']]
temp_f = temp_f.reset_index(drop=True)
temp_f.style.background_gradient(cmap="Blues", subset=['Confirmed'])\
.background_gradient(cmap="Reds", subset=['Deaths'])
Model Loss
import math
def model(N, a, alpha, t):
return max(N, 0) * (1 - math.e ** (min(-a, 0) * t)) ** max(alpha, 0)
model_index = 0
def model_loss(params):
# N, a, alpha, lag = params
N, a, alpha = params
model_x = []
r = 0
for t in range(len(df)):
r += (model(N, a, alpha, t) - df.iloc[t, model_index]) ** 2
return math.sqrt(r)
We need to explore the 3d parameter space to find a minimum, using gradient descent. There are a number of algorithms to do that in scipy.optimize, I stopped at the first one that seemed to work. Generalized Reduced Gradient as in Excel solver also works.
province = 'Punjab'
def display_fit_pr(df, opt_confirmed, opt_deaths, ax, province):
model_x = []
isValid = True
last_death_rate = 0
for t in range(len(df)):
model_x.append([df.index[t], model(*opt_confirmed, t), model(*opt_deaths, t)])
#if deaths + recovered > confirmed or deaths rate > 5%, maybe not valid
if (t > len(df)):
last_row = model_x[-1]
if (last_row[2] > last_row[1]*0.05):
if (isValid):
last_row2 = model_x[-2]
last_death_rate = last_row2[2]/last_row2[1]
isValid = False
if (last_row[2] > last_row[1]*0.05):
last_row[2] = last_row[1]*last_death_rate
model_sim = pd.DataFrame(model_x, dtype=int)
model_sim.set_index(0, inplace=True)
model_sim.columns = ['Model-Confirmed', 'Model-Deaths']
plot_color = ['#99990077', '#FF000055', '#999900FF', '#FF0000FF']
return pd.concat([model_sim, df], axis=1).plot(linestyle = '-', marker = '.',ax=ax, figsize=(14, 10), color = plot_color, title=province+' COVID-19 Cases')
def display_extended_curve_pr(df, opt_confirmed, opt_deaths, ax, province):
start_date = df.index[0]
n_days = len(df) + 15
extended_model_x = []
isValid = True
last_death_rate = 0
for t in range(n_days):
extended_model_x.append([start_date + datetime.timedelta(days=t), model(*opt_confirmed, t), model(*opt_deaths, t)])
#if deaths + recovered > confirmed or deaths rate > 5%, maybe not valid
if (t > len(df)):
last_row = extended_model_x[-1]
if (last_row[2] > last_row[1]*0.05):
if (isValid):
last_row2 = extended_model_x[-2]
last_death_rate = last_row2[2]/last_row2[1]
isValid = False
if (last_row[2] > last_row[1]*0.05):
last_row[2] = last_row[1]*last_death_rate
extended_model_sim = pd.DataFrame(extended_model_x, dtype=int)
extended_model_sim.set_index(0, inplace=True)
extended_model_sim.columns = ['Model-Confirmed', 'Model-Deaths']
plot_color = ['#99990077', '#FF000055', '#999900FF', '#FF0000FF']
return pd.concat([extended_model_sim, df], axis=1).plot(linestyle = '-', marker = '.',ax=ax, figsize=(14, 10), color = plot_color, title=province+' Forecast for 15 days using Diffusion & Gradient')
def display_extended_curve_conf(df, opt_confirmed, opt_deaths, ax, province):
start_date = df.index[0]
n_days = len(df) + 15
extended_model_x = []
isValid = True
last_death_rate = 0
for t in range(n_days):
extended_model_x.append([start_date + datetime.timedelta(days=t), model(*opt_confirmed, t), model(*opt_deaths, t)])
#if deaths + recovered > confirmed or deaths rate > 5%, maybe not valid
if (t > len(df)):
last_row = extended_model_x[-1]
if (last_row[2] > last_row[1]*0.05):
if (isValid):
last_row2 = extended_model_x[-2]
last_death_rate = last_row2[2]/last_row2[1]
isValid = False
if (last_row[2] > last_row[1]*0.05):
last_row[2] = last_row[1]*last_death_rate
extended_model_sim = pd.DataFrame(extended_model_x, dtype=int)
extended_model_sim.set_index(0, inplace=True)
extended_model_sim.columns = ['Model-Confirmed', 'Model-Deaths']
plot_color = ['#99990077', '#FF000055', '#999900FF', '#FF0000FF']
plot_color_c = ['#99990077', '#999900FF']
plot_color_d = ['#FF000055', '#FF0000FF']
return pd.concat([extended_model_sim['Model-Confirmed'], df['Confirmed']], axis=1).plot(linestyle = '-', marker = '.',ax=ax, figsize=(14, 10), color = plot_color_c, title=province+' Diffusion Process Confirmed Cases Forecast for 15 days')
def display_extended_curve_fat(df, opt_confirmed, opt_deaths, ax, province):
start_date = df.index[0]
n_days = len(df) + 15
extended_model_x = []
isValid = True
last_death_rate = 0
for t in range(n_days):
extended_model_x.append([start_date + datetime.timedelta(days=t), model(*opt_confirmed, t), model(*opt_deaths, t)])
#if deaths + recovered > confirmed or deaths rate > 5%, maybe not valid
if (t > len(df)):
last_row = extended_model_x[-1]
if (last_row[2] > last_row[1]*0.05):
if (isValid):
last_row2 = extended_model_x[-2]
last_death_rate = last_row2[2]/last_row2[1]
isValid = False
if (last_row[2] > last_row[1]*0.05):
last_row[2] = last_row[1]*last_death_rate
extended_model_sim = pd.DataFrame(extended_model_x, dtype=int)
extended_model_sim.set_index(0, inplace=True)
extended_model_sim.columns = ['Model-Confirmed', 'Model-Deaths']
plot_color = ['#99990077', '#FF000055', '#999900FF', '#FF0000FF']
plot_color_c = ['#99990077', '#999900FF']
plot_color_d = ['#FF000055', '#FF0000FF']
#print(extended_model_sim['Model-Deaths'], df['Deaths'])
return pd.concat([extended_model_sim['Model-Deaths'], df['Deaths']], axis=1).plot(linestyle = '-', marker = '.',ax=ax, figsize=(14, 10), color = plot_color_d, title=province+' Diffusion Process Fatalities Forecast for 15 days')
def display_extended_curve_pr(df, opt_confirmed, opt_deaths, ax):
start_date = df.index[0]
n_days = len(df) + 40
extended_model_x = []
isValid = True
last_death_rate = 0
for t in range(n_days):
extended_model_x.append([start_date + datetime.timedelta(days=t), model(*opt_confirmed, t), model(*opt_deaths, t)])
#if deaths + recovered > confirmed or deaths rate > 5%, maybe not valid
if (t > len(df)):
last_row = extended_model_x[-1]
if (last_row[2] > last_row[1]*0.05):
if (isValid):
last_row2 = extended_model_x[-2]
last_death_rate = last_row2[2]/last_row2[1]
isValid = False
if (last_row[2] > last_row[1]*0.05):
last_row[2] = last_row[1]*last_death_rate
extended_model_sim = pd.DataFrame(extended_model_x, dtype=int)
extended_model_sim.set_index(0, inplace=True)
extended_model_sim.columns = ['Model-Confirmed', 'Model-Deaths']
plot_color = ['#99990077', '#FF000055', '#999900FF', '#FF0000FF']
return pd.concat([extended_model_sim, df], axis=1).plot(ax=ax, figsize=(14, 10), color = plot_color, title='Forecast for next 40 days')
def display_extended_table_pr(df):
start_date = df.index[0]
n_days = len(df) + 15
extended_model_x = []
isValid = True
last_death_rate = 0
# if the last data point repeats the previous one, or is lower, drop it
if len(df) > 1 and df.iloc[-2,0] >= df.iloc[-1,0]:
df.drop(df.tail(1).index,inplace=True)
global model_index
model_index = 0
opt_confirmed = minimize(model_loss, x0=np.array([200000, 0.05, 15]), method='Nelder-Mead', tol=1e-5).x
model_index = 1
opt_deaths = minimize(model_loss, x0=np.array([200000, 0.05, 15]), method='Nelder-Mead', tol=1e-5).x
for t in range(n_days):
extended_model_x.append([start_date + datetime.timedelta(days=t), model(*opt_confirmed, t), model(*opt_deaths, t)])
#if deaths + recovered > confirmed or deaths rate > 5%, maybe not valid
if (t > len(df)):
last_row = extended_model_x[-1]
if (last_row[2] > last_row[1]*0.05):
if (isValid):
last_row2 = extended_model_x[-2]
last_death_rate = last_row2[2]/last_row2[1]
isValid = False
if (last_row[2] > last_row[1]*0.05):
last_row[2] = last_row[1]*last_death_rate
extended_model_sim = pd.DataFrame(extended_model_x, dtype=int)
extended_model_sim.set_index(0, inplace=True)
extended_model_sim.columns = ['Model-Confirmed', 'Model-Deaths']
return extended_model_sim
from scipy.optimize import minimize
def opt_display_model_pr(df, stats, province):
# if the last data point repeats the previous one, or is lower, drop it
if len(df) > 1 and df.iloc[-2,0] >= df.iloc[-1,0]:
df.drop(df.tail(1).index,inplace=True)
global model_index
model_index = 0
opt_confirmed = minimize(model_loss, x0=np.array([200000, 0.05, 15]), method='Nelder-Mead', tol=1e-5).x
model_index = 1
opt_deaths = minimize(model_loss, x0=np.array([200000, 0.05, 15]), method='Nelder-Mead', tol=1e-5).x
if min(opt_confirmed) > 0:
stats.append([province, *opt_confirmed, *opt_deaths])
n_plot = len(stats)
plt.figure(1)
ax1 = plt.subplot(221)
display_extended_curve_conf(df, opt_confirmed, opt_deaths, ax1, province)
ax2 = plt.subplot(222)
display_extended_curve_fat(df, opt_confirmed, opt_deaths, ax2, province)
plt.show()
def display_fit(df, opt_confirmed, opt_deaths, ax):
model_x = []
isValid = True
last_death_rate = 0
for t in range(len(df)):
model_x.append([df.index[t], model(*opt_confirmed, t), model(*opt_deaths, t)])
#if deaths + recovered > confirmed or deaths rate > 5%, maybe not valid
if (t > len(df)):
last_row = model_x[-1]
if (last_row[2] > last_row[1]*0.05):
if (isValid):
last_row2 = model_x[-2]
last_death_rate = last_row2[2]/last_row2[1]
isValid = False
if (last_row[2] > last_row[1]*0.05):
last_row[2] = last_row[1]*last_death_rate
model_sim = pd.DataFrame(model_x, dtype=int)
model_sim.set_index(0, inplace=True)
model_sim.columns = ['Model-Confirmed', 'Model-Deaths']
plot_color = ['#99990077', '#FF000055', '#999900FF', '#FF0000FF']
return pd.concat([model_sim, df], axis=1).plot(ax=ax, figsize=(14, 10), color = plot_color)
from scipy.optimize import minimize
def opt_display_model(df, stats):
# if the last data point repeats the previous one, or is lower, drop it
if len(df) > 1 and df.iloc[-2,0] >= df.iloc[-1,0]:
df.drop(df.tail(1).index,inplace=True)
global model_index
model_index = 0
opt_confirmed = minimize(model_loss, x0=np.array([200000, 0.05, 15]), method='Nelder-Mead', tol=1e-5).x
model_index = 1
opt_deaths = minimize(model_loss, x0=np.array([200000, 0.05, 15]), method='Nelder-Mead', tol=1e-5).x
if min(opt_confirmed) > 0:
stats.append([province, *opt_confirmed, *opt_deaths])
n_plot = len(stats)
plt.figure(1)
ax1 = plt.subplot(221)
display_fit(df, opt_confirmed, opt_deaths, ax1)
ax2 = plt.subplot(222)
display_extended_curve_pr(df, opt_confirmed, opt_deaths, ax2)
plt.show()
def get_time_series_province_pr(province):
global fulltable_pr
df = fulltable_pr[(fulltable_pr['Region'] == province)]
return df.set_index('Date')[['Confirmed', 'Deaths']]
Test Provincial level predictions
df = get_time_series_province_pr('Punjab')
province = 'Punjab'
stats = []
print('{} of Pakistan COVID-19 Prediction'.format('Punjab'))
opt_display_model_pr(df, stats, province)
#opt_display_model(df, stats)
df = get_time_series_province_pr('Punjab')
print('{} COVID-19 Prediction'.format('Punjab'))
province = display_extended_table_pr(df)
pd.options.display.float_format = '{:20,.0f}'.format
concat_df = pd.concat([df, province], axis=1)
concat_df[concat_df.index.day % 2 == 0].tail(30)
province = 'Islamabad'
df = get_time_series_province_pr('ICT')
stats = []
print('{} of Pakistan COVID-19 Prediction'.format('ICT'))
opt_display_model_pr(df, stats, province)
df = get_time_series_province_pr('ICT')
print('{} COVID-19 Prediction'.format('Islamabad'))
province = display_extended_table_pr(df)
pd.options.display.float_format = '{:20,.0f}'.format
concat_df = pd.concat([df, province], axis=1)
concat_df[concat_df.index.day % 2 == 0].tail(30)
province = 'Sindh'
df = get_time_series_province_pr('Sindh')
stats = []
print('{} of Pakistan COVID-19 Prediction'.format('Sindh'))
opt_display_model_pr(df, stats, province)
df = get_time_series_province_pr('Sindh')
print('{} COVID-19 Prediction'.format('Sindh'))
province = display_extended_table_pr(df)
pd.options.display.float_format = '{:20,.0f}'.format
concat_df = pd.concat([df, province], axis=1)
concat_df[concat_df.index.day % 2 == 0].tail(30)
province = 'Balochistan'
df = get_time_series_province_pr('Balochistan')
stats = []
print('{} of Pakistan COVID-19 Prediction'.format('Balochistan'))
opt_display_model_pr(df, stats, province)
df = get_time_series_province_pr('Balochistan')
print('{} COVID-19 Prediction'.format('Balochistan'))
province = display_extended_table_pr(df)
pd.options.display.float_format = '{:20,.0f}'.format
concat_df = pd.concat([df, province], axis=1)
concat_df[concat_df.index.day % 2 == 0].tail(30)
province = 'KPK'
df = get_time_series_province_pr('KP')
stats = []
print('{} of Pakistan COVID-19 Prediction'.format('KPK'))
opt_display_model_pr(df, stats, province)
df = get_time_series_province_pr('KP')
print('{} COVID-19 Prediction'.format('KPK'))
province = display_extended_table_pr(df)
pd.options.display.float_format = '{:20,.0f}'.format
concat_df = pd.concat([df, province], axis=1)
concat_df[concat_df.index.day % 2 == 0].tail(30)
province = 'AJK'
df = get_time_series_province_pr('AJK')
stats = []
print('{} of Pakistan COVID-19 Prediction'.format('AJK'))
opt_display_model_pr(df, stats, province)
df = get_time_series_province_pr('AJK')
print('{} COVID-19 Prediction'.format('AJK'))
province = display_extended_table_pr(df)
pd.options.display.float_format = '{:20,.0f}'.format
concat_df = pd.concat([df, province], axis=1)
concat_df[concat_df.index.day % 3 == 0].tail(30)
province = 'GB'
df = get_time_series_province_pr('GB')
stats = []
print('{} of Pakistan COVID-19 Prediction'.format('GB'))
opt_display_model_pr(df, stats, province)
df = get_time_series_province_pr('GB')
print('{} COVID-19 Prediction'.format('GB'))
province = display_extended_table_pr(df)
pd.options.display.float_format = '{:20,.0f}'.format
concat_df = pd.concat([df, province], axis=1)
concat_df[concat_df.index.day % 2 == 0].tail(30)
stats = []
country = 'Pakistan'
df = fulltable_pr[['Region', 'Date', 'Confirmed', 'Deaths']].groupby('Date').sum()
print('Pakistan COVID-19 Prediction')
opt_display_model_pr(df, stats, country)
Predictions Table
model = display_extended_table_pr(df)
pd.options.display.float_format = '{:20,.0f}'.format
concat_df = pd.concat([df, model], axis=1)
concat_df[concat_df.index.day % 2 == 1].tail(25)
from statsmodels.tsa.statespace.sarimax import SARIMAX
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
#plt.style.use('fivethirtyeight')
%matplotlib inline
global fulltable
# SARIMA Confirmed
predgrp = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
pred_cnfrm = predgrp.loc[:,["Date","Confirmed"]]
pr_data_cf = pred_cnfrm
pr_data_cf.columns = ['ds','y']
arima = SARIMAX(pr_data_cf['y'], order=(3, 1, 0))
model = arima.fit()
forecast_c = model.forecast(steps= 15)
df_c = forecast_c.reset_index().drop('index',axis=1)
pred_c = list(forecast_c)
pred_c
# SARIMA Fatalities
pred_exp = predgrp.loc[:,["Date","Deaths"]]
pr_data_e = pred_exp
pr_data_e.columns = ['ds','y']
arima_e = SARIMAX(pr_data_e['y'], order=(3, 1, 0))
arima_e = arima_e.fit(trend='c', full_output=True, disp=True)
forecast_e = arima_e.forecast(steps= 15)
pred_e = list(forecast_e)
# Plot Forecast
start_date = pr_data_cf['ds'].max()
prediction_dates = []
for i in range(15):
date = start_date + datetime.timedelta(days=1)
prediction_dates.append(date)
start_date = date
# '#FF3633' | '#8dc354' | '#000000' | '#ff9999'
plt.figure(1)
plt.figure(figsize= (20,10))
ax1 = plt.subplot(221)
ax1.plot(pr_data_cf['ds'], pr_data_cf['y'], linewidth = 3, linestyle='-', color = '#FF3633')
ax1.plot(prediction_dates, pred_c, linewidth = 3, linestyle='--', color = '#ff9999')
ax2 = plt.subplot(222)
ax2.plot(pr_data_e['ds'], pr_data_e['y'], linewidth = 3, linestyle='-', color = '#000000')
ax2.plot(prediction_dates, pred_e, linewidth = 3, linestyle='--', color = 'grey')
ax1.set_title('COVID-19 Pakistan Confirmed Cases Forecast using (SARIMA)', size=15)
ax1.set_xlabel('Dates', size=15)
ax1.set_ylabel('Confirmed', size=15)
ax2.set_title('COVID-19 Pakistan Fatalities Forecast using (SARIMA)', size=15)
ax2.set_xlabel('Dates', size=15)
ax2.set_ylabel('Fatalities', size=15)
# concat_df
dates = pd.to_datetime(prediction_dates)
dates = pd.DataFrame(dates)
dates.columns = ['Dates']
dates
ar_c = pd.DataFrame(pred_c)
ar_c.columns = ['SARIMAX_Confirmed']
ar_c
ar_e = pd.DataFrame(pred_e, dtype=int)
ar_e.columns = ['SARIMAX_Deaths']
ar_e
#extended_model_ar = pd.DataFrame(dates, pred_c, dtype=int)
#extended_model_ar.set_index(0, inplace=True)
#extended_model_ar.columns = ['ARIMA-Confirmed']
#extended_model_ar
#pr_data_c['y']
concat_sar = pd.concat([dates, ar_c, ar_e ], axis=1)
concat_sar
# from sklearn import metrics
# np.sqrt(metrics.mean_squared_error(pr_data_cf['y'], pred_c))
SARIMA Diagnostics
model.plot_diagnostics()
plt.show()
from statsmodels.tsa.arima_model import ARIMA
print("ARIMA_1")
arima1 = ARIMA(pr_data['y'], order=(1, 1, 0))
arima1 = arima1.fit(trend='c', full_output=True, disp=True)
print("AIC: ",arima1.aic,",","BIC: ", arima1.bic)
residuals = arima1.resid
mae = np.mean(np.abs(residuals))
print("RMSE: ",mae)
print("ARIMA_2")
arima2 = ARIMA(pr_data['y'], order=(2, 1, 0))
arima2 = arima2.fit(trend='c', full_output=True, disp=True)
print("AIC: ",arima2.aic,",", "BIC: ",arima2.bic)
residuals = arima2.resid
mae = np.mean(np.abs(residuals))
print("RMSE:",mae)
print("ARIMA_3")
arima_3 = ARIMA(pr_data['y'], order=(3, 1, 0))
arima_3 = arima_3.fit(trend='c', full_output=True, disp=True)
print("AIC: ",arima_3.aic,",","BIC: ", arima_3.bic)
residuals = arima_3.resid
mae = np.mean(np.abs(residuals))
print("RMSE: ",mae)
print("ARIMA_4")
arima_4 = ARIMA(pr_data['y'], order=(4, 1, 0))
arima_4 = arima_4.fit(trend='c', full_output=True, disp=True)
print("AIC: ",arima_4.aic,",","BIC: ", arima_4.bic)
residuals = arima_4.resid
mae = np.mean(np.abs(residuals))
print("RMSE: ",mae)
print("ARIMA_5")
arima5 = ARIMA(pr_data['y'], order=(5, 1, 0))
arima5 = arima5.fit(trend='c', full_output=True, disp=True)
print("AIC: ",arima5.aic,",","BIC: ", arima5.bic)
residuals = arima5.resid
mae = np.mean(np.abs(residuals))
print("RMSE: ",mae)
print("ARIMA_6")
arima_6 = ARIMA(pr_data['y'], order=(6, 1, 0))
arima_6 = arima_6.fit(trend='c', full_output=True, disp=True)
print("AIC: ",arima_6.aic,",","BIC: ", arima_6.bic)
residuals = arima_6.resid
mae = np.mean(np.abs(residuals))
print("RMSE: ",mae)
# forecast = arima.forecast(steps= 15)
# pred = list(forecast[0])
# start_date = pr_data['ds'].max()
# prediction_dates = []
# for i in range(15):
# date = start_date + datetime.timedelta(days=1)
# prediction_dates.append(date)
# start_date = date
# plt.figure(figsize= (10,7))
# plt.xlabel("Dates",fontsize = 15)
# plt.ylabel('Cofirmed Cases',fontsize = 15)
# plt.title("Predicted Values for the next 15 Days" , fontsize = 20)
# #'#ff9999'-->pink
# plt.plot_date(y= pred,x= prediction_dates,linestyle ='dashed',color = 'c',label = 'Confirmed Forecast');
# plt.plot_date(y=pr_data['y'],x=pr_data['ds'],linestyle = '-',color = 'blue',label = 'Confirmed');
# plt.legend();
import datetime
from statsmodels.tsa.holtwinters import ExponentialSmoothing
global fulltable
predgrp = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
pred_cnfrm = predgrp.loc[:,["Date","Confirmed"]]
pr_data = pred_cnfrm
pr_data.columns = ['ds','y']
# fit model
model = ExponentialSmoothing(pr_data['y'])
model_fit = model.fit()
# make prediction
yhat = model_fit.predict()
# Plot Forecast
start_date = pr_data['ds'].max()
prediction_dates = []
for i in range(15):
date = start_date + datetime.timedelta(days=1)
prediction_dates.append(date)
start_date = date
# # Plot Forecast
# start_date = pr_data['ds'].max()
# prediction_dates = []
# for i in range(15):
# date = start_date + datetime.timedelta(days=1)
# prediction_dates.append(date)
# start_date = date
# plt.plot_date(x=prediction_dates,y=pred_c)
# plt.show()
predgrp = fulltable.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
predgrp['Date']
predgrp = predgrp.replace(0, np.nan)
pred_cnfrm = predgrp.loc[:,"Confirmed"].dropna()
pred_data = pred_cnfrm.values.tolist()
growth_diff = []
for i in range(1,len(pred_data)):
growth_diff.append(pred_data[i] / pred_data[i-1])
growth_factor = sum(growth_diff)/len(growth_diff)
print('Average growth factor',growth_factor)
import datetime
prediction_dates = []
dates_pk = predgrp.loc[:,"Date"]
dates_pk
start_date = dates_pk[len(dates_pk)-1]
start_date
#start_date = dates_india[len(dates_india) - 1]
for i in range(27):
date = start_date + datetime.timedelta(days=1)
prediction_dates.append(date)
start_date = date
previous_day_cases = pred_cnfrm[len(dates_pk) - 1]
previous_day_cases
predicted_cases = []
for i in range(27):
predicted_value = previous_day_cases * growth_factor
predicted_cases.append(predicted_value)
previous_day_cases = predicted_value
plt.figure(figsize= (10,7))
plt.xticks(rotation = 90 ,fontsize = 11)
plt.yticks(fontsize = 10)
plt.xlabel("Dates",fontsize = 15)
plt.ylabel('Total cases',fontsize = 15)
plt.title("Pakistan Forecast" , fontsize = 20)
ax1 = plt.plot_date(y= predicted_cases,x= prediction_dates,linestyle ='-',color = 'c')